******************************************************************************************
* Do-file name:	cr_treat_control_sample.do                                               
* Task:         generate grid of individuals, merge employment and unemployment data     
* Last change:  18.03.2019                                                               
******************************************************************************************



******************************************************************************************
*** program setup
******************************************************************************************

version 14.2
clear all
macro drop _all
set linesize 90
set more off
* set trace on
discard
set seed 123456789


******************************************************************************************
*** set macros
******************************************************************************************

global start_year 	"1980"
global end_year 	"2000"


******************************************************************************************
*** extract vsnr ever worked in border or control region
******************************************************************************************

forvalues x = $start_year / $end_year {
use "data\orig/clean`x'.dta", clear

** drop variables
keep vsnr_ano ao_gem

** work place
replace ao_gem = . if ao_gem < 0
label variable ao_gem  "workplace: municipality/gemeinde"

gen ao_kreis = int(ao_gem/1000)
replace ao_kreis = .  if ao_kreis < 0 | ao_kreis >= 99999
replace ao_kreis = 11000 if ao_kreis == 11100 | ao_kreis == 11200  // Recode all parts of Berlin into one code
label variable ao_kreis "workplace: district/landkreis"
drop if ao_kreis == .

** flag districts eligible under the commuting policy
gen border = (ao_kreis == 9262 | ao_kreis == 9263 | ao_kreis == 9271 | ao_kreis == 9272 | ao_kreis == 9275 | ///
			  ao_kreis == 9276 | ao_kreis == 9278 | ao_kreis == 9361 | ao_kreis == 9363 | ao_kreis == 9371 | ///
			  ao_kreis == 9372 | ao_kreis == 9374 | ao_kreis == 9376 | ao_kreis == 9377 | ao_kreis == 9462 | ///
			  ao_kreis == 9464 | ao_kreis == 9472 | ao_kreis == 9475 | ao_kreis == 9476 | ao_kreis == 9477 | ///
			  ao_kreis == 9479)
replace border = .  if ao_kreis == . 
label variable border "1 if district eligible under the commuting scheme, 0 otherwise"

** flag control districts
gen control = (ao_kreis == 1051 | ao_kreis == 1054 | ao_kreis == 3358 | ao_kreis == 3453 | ao_kreis == 3454 | ///
			   ao_kreis == 3456 | ao_kreis == 3462 | ao_kreis == 7211 | ao_kreis == 7231 | ao_kreis == 7232 | ///
			   ao_kreis == 7233 | ao_kreis == 7235 | ao_kreis == 9180 | ao_kreis == 9183 | ao_kreis == 9261 | ///
			   ao_kreis == 9274 | ao_kreis == 9277 | ao_kreis == 9373 | ao_kreis == 9561 | ao_kreis == 9571 | ///
			   ao_kreis == 9575 | ao_kreis == 9577 | ao_kreis == 9777 | ao_kreis == 9780)
replace control = .  if ao_kreis == . 
label variable control "1 if district is control region, 0 otherwise"

** keep individuals in border or control regions
keep if border == 1 | control == 1

** clean data
drop ao_gem ao_kreis border control
compress

** append yearly data to one dataset
if `x' == $start_year {
save "data/grid_treat_cont.dta", replace 
	}
else {
append using "data/grid_treat_cont.dta"
sort vsnr_ano 
duplicates drop vsnr_ano, force
save "data/grid_treat_cont.dta", replace
	}
	}


******************************************************************************************
*** expand dataset to balanced panel
******************************************************************************************

gen year = 1980
expand 21, gen(duplicate)
bys vsnr_ano (duplicate): replace year = year + _n-1
drop duplicate
gen status = .

compress
save "data/grid_treat_cont.dta", replace


******************************************************************************************
*** merge employment data to grid
******************************************************************************************

** prepare employment data for merge with grid
forvalues x = $start_year / $end_year {
use "data\orig/clean`x'.dta", clear
keep bnr_ano_n vsnr_ano dauer sex staat wo_gem beruford berufstg ausbild pers_gr ///
     tag_entg wz?? ao_gem alter estsize ten 
gen year = `x'
order vsnr_ano year
sort vsnr_ano year
save "data/empl_year_`x'.dta", replace


** merge employment data to grid
if `x' == $start_year {
use "data/grid_treat_cont.dta", clear
merge 1:1 vsnr_ano year using "data/empl_year_`x'.dta"
drop if _merge == 2
replace status = 1  if _merge == 3
drop _merge
save "data/grid_treat_cont_empl.dta", replace
erase "data/empl_year_`x'.dta"
	}
else {
use "data/grid_treat_cont_empl.dta", clear
merge 1:1 vsnr_ano year using "data/empl_year_`x'.dta", update
drop if _merge == 2
replace status = 1  if _merge == 4
drop _merge
save "data/grid_treat_cont_empl.dta", replace
erase "data/empl_year_`x'.dta"
	}
	}


******************************************************************************************
*** merge unemployment data to grid
******************************************************************************************

use "data/grid_treat_cont_empl.dta", clear
merge 1:1 vsnr_ano year using "data/unempl_all_clean.dta", update
drop if _merge == 2
gen double_spell = 1  if _merge == 5
label var double_spell "obs. has both empl. & umempl. spell"
drop _merge


******************************************************************************************
*** mark out-of-labor force observations 
******************************************************************************************

replace status = 3  if status == .		// out-of-labor force
label var status "labor force status: 1=empl, 2=unempl, 3=out of lf"


******************************************************************************************
*** fill in missing work municipality with past values (max 4 years back in past)
******************************************************************************************

*** correct ao_gem values
replace ao_gem = . if ao_gem < 0
replace ao_gem = 11000000  if ao_gem == 11100000 | ao_gem == 11200000  // Recode all parts of Berlin into one code
label variable ao_gem  "workplace: municipality/gemeinde (original)"

*** fill in missings with past values: using max 4 years lagged
gen long ao_gem_imp = ao_gem
gen imp_lag_ao_gem  = 0  if ao_gem_imp != .
forvalues x = 1/4 {
bys vsnr_ano (year): replace ao_gem_imp = ao_gem[_n-`x']  if ao_gem_imp == . & ao_gem[_n-`x'] != .
bys vsnr_ano (year): replace imp_lag_ao_gem = 0-`x'  if imp_lag_ao_gem == . & ao_gem_imp != . & ao_gem[_n-`x'] != .
 }
label variable ao_gem_imp  "workplace: municipality/gemeinde (imputed, years used: max 4 years lagged)"
label variable imp_lag_ao_gem  "# of years going back to impute workplace (based on ao_gem_imp, max 4 years)"

*** create work district variable for imputed version of the work municipality variable
gen ao_kreis_imp = int(ao_gem_imp/1000)
replace ao_kreis_imp = .  if ao_kreis_imp < 0 | ao_kreis_imp >= 99999
replace ao_kreis_imp = 11000  if ao_kreis_imp == 11100 | ao_kreis_imp == 11200  // Recode all parts of Berlin into one code
label variable ao_kreis_imp "workplace: district/landk. (based on ao_gem_imp, years used: max 4 y. lagged)"

*** flag districts eligible under the commuting policy
gen border_imp = (ao_kreis_imp == 9262 | ao_kreis_imp == 9263 | ao_kreis_imp == 9271 | ao_kreis_imp == 9272 | ao_kreis_imp == 9275 | ///
			      ao_kreis_imp == 9276 | ao_kreis_imp == 9278 | ao_kreis_imp == 9361 | ao_kreis_imp == 9363 | ao_kreis_imp == 9371 | ///
			      ao_kreis_imp == 9372 | ao_kreis_imp == 9374 | ao_kreis_imp == 9376 | ao_kreis_imp == 9377 | ao_kreis_imp == 9462 | ///
			      ao_kreis_imp == 9464 | ao_kreis_imp == 9472 | ao_kreis_imp == 9475 | ao_kreis_imp == 9476 | ao_kreis_imp == 9477 | ///
			      ao_kreis_imp == 9479)
replace border_imp = .  if ao_gem_imp == . 
label variable border_imp "1 if district eligible under the commuting scheme (based on ao_gem_imp)"

** flag control districts
gen control_imp = (ao_kreis_imp == 1051 | ao_kreis_imp == 1054 | ao_kreis_imp == 3358 | ao_kreis_imp == 3453 | ao_kreis_imp == 3454 | ///
				   ao_kreis_imp == 3456 | ao_kreis_imp == 3462 | ao_kreis_imp == 7211 | ao_kreis_imp == 7231 | ao_kreis_imp == 7232 | ///
			       ao_kreis_imp == 7233 | ao_kreis_imp == 7235 | ao_kreis_imp == 9180 | ao_kreis_imp == 9183 | ao_kreis_imp == 9261 | ///
			       ao_kreis_imp == 9274 | ao_kreis_imp == 9277 | ao_kreis_imp == 9373 | ao_kreis_imp == 9561 | ao_kreis_imp == 9571 | ///
			       ao_kreis_imp == 9575 | ao_kreis_imp == 9577 | ao_kreis_imp == 9777 | ao_kreis_imp == 9780)
replace control_imp = .  if ao_gem_imp == . 
label variable control_imp "1 if district is control region (based on ao_gem_imp)"


******************************************************************************************
*** flag persons in individual analysis sample 
******************************************************************************************

gen help_1 = 1  if (border_imp == 1 | control_imp == 1) & year == 1990
replace help_1 = 0  if help_1 == .
bys vsnr_ano (year): egen sample_indiv = max(help_1)
label var sample_indiv "1 if individual belongs to individual sample"
drop help_1


******************************************************************************************
*** save new dataset
******************************************************************************************

order vsnr_ano year bnr_ano_n ao_gem ao_gem_imp imp_lag_ao_gem ao_kreis_imp border_imp control_imp
compress
label data "balanced sample of individuals ever worked in treatment or control regions (years: 1980-2000)"
save "data/treat_control_sample.dta", replace

** delete old data
erase "data/grid_treat_cont.dta"
erase "data/grid_treat_cont_empl.dta"


******************************************************************************************
*** end
******************************************************************************************

exit


*========================================================================================*
Comments:
- unique identifier: vsnr_ano
